Biomineralization Toolkit GO EnrichmentAnalysis

sessionInfo() #provides list of loaded packages and version of R. 
## R version 4.3.2 (2023-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.0
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## loaded via a namespace (and not attached):
##  [1] digest_0.6.34     R6_2.5.1          fastmap_1.1.1     xfun_0.42        
##  [5] cachem_1.0.8      knitr_1.45        htmltools_0.5.7   rmarkdown_2.25   
##  [9] lifecycle_1.0.4   cli_3.6.2         sass_0.4.8        jquerylib_0.1.4  
## [13] compiler_4.3.2    rstudioapi_0.15.0 tools_4.3.2       evaluate_0.23    
## [17] bslib_0.6.1       yaml_2.3.8        rlang_1.1.3       jsonlite_1.8.8

First, load the necessary packages.

# load libraries - notes show the install command needed to install (pre installed)
library(goseq)
## Loading required package: BiasedUrn
## Loading required package: geneLenDataBase
## 
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(forcats)
library(ggplot2)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(tidyr)
library(grDevices)
library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(Rmisc)
## Loading required package: lattice
## Loading required package: plyr
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
library(ggpubr)
## 
## Attaching package: 'ggpubr'
## The following object is masked from 'package:plyr':
## 
##     mutate
library(tibble)
library(gridExtra)
library(tidyr)
library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.16.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
## 
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
##     genomic data. Bioinformatics 2016.
## 
## 
## The new InteractiveComplexHeatmap package can directly export static 
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
library(circlize)
## ========================================
## circlize version 0.4.15
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: https://jokergoo.github.io/circlize_book/book/
## 
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
##   in R. Bioinformatics 2014.
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(circlize))
## ========================================
library(GSEABase)
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: annotate
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: IRanges
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:plyr':
## 
##     rename
## The following object is masked from 'package:tidyr':
## 
##     expand
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## The following object is masked from 'package:geneLenDataBase':
## 
##     unfactor
## The following object is masked from 'package:utils':
## 
##     findMatches
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## 
## Attaching package: 'IRanges'
## The following object is masked from 'package:plyr':
## 
##     desc
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## 
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:dplyr':
## 
##     select
## Loading required package: XML
## Loading required package: graph
## 
## Attaching package: 'graph'
## The following object is masked from 'package:XML':
## 
##     addNode
## The following object is masked from 'package:circlize':
## 
##     degree
## The following object is masked from 'package:plyr':
## 
##     join
library(data.table)
## 
## Attaching package: 'data.table'
## The following object is masked from 'package:IRanges':
## 
##     shift
## The following objects are masked from 'package:S4Vectors':
## 
##     first, second
## The following objects are masked from 'package:zoo':
## 
##     yearmon, yearqtr
## The following objects are masked from 'package:reshape2':
## 
##     dcast, melt
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
library(stringr)
## 
## Attaching package: 'stringr'
## The following object is masked from 'package:graph':
## 
##     boundary
library(GenomicRanges)
## Loading required package: GenomeInfoDb
library(rtracklayer)
library(rrvgo)

Biomineralization toolkit present in modules

biomin <- read.csv("../../output/Biomin_blast_Pocillopora_acuta_best_hit.csv") %>% rename("Pocillopora_acuta_best_hit" = "gene_id" )
geneInfo <- read.csv("../../output/WGCNA/GO_analysis/geneInfo_WGCNA.csv")
biomin_mod <- merge(biomin, geneInfo, by=c("gene_id"), all=F)
head(biomin_mod)
##                                     gene_id accessionnumber.geneID
## 1 Pocillopora_acuta_HIv2___RNAseq.g10093.t2         XP_022804785.1
## 2 Pocillopora_acuta_HIv2___RNAseq.g11609.t1              P33_g8985
## 3 Pocillopora_acuta_HIv2___RNAseq.g13172.t1             JR972076.1
## 4 Pocillopora_acuta_HIv2___RNAseq.g13172.t1            Gene:g13552
## 5 Pocillopora_acuta_HIv2___RNAseq.g13172.t1       aug_v2a.06327.t1
## 6 Pocillopora_acuta_HIv2___RNAseq.g13823.t1             PFX18785.1
##                                                          definition
## 1 thioredoxin reductase 1, cytoplasmic-like [Stylophora pistillata]
## 2                                      Flagellar associated protein
## 3              Acidic skeletal organic matrix protein (Acidic SOMP)
## 4                                     Acidic SOMP (Full-Length p27)
## 5                                                            SAARP3
## 6                                   Mucin-4 [Stylophora pistillata]
##                         Ref    X                           seqnames   start
## 1        Peled et al., 2020  224 Pocillopora_acuta_HIv2___Sc0000021 2329764
## 2        Drake et al., 2013  604 Pocillopora_acuta_HIv2___Sc0000013 2026351
## 3  Ramos-Silva et al., 2013 1049 Pocillopora_acuta_HIv2___Sc0000004 7860395
## 4 Mummadisetti et al., 2021 1049 Pocillopora_acuta_HIv2___Sc0000004 7860395
## 5     Takeuchi et al., 2016 1049 Pocillopora_acuta_HIv2___Sc0000004 7860395
## 6        Peled et al., 2020 1240 Pocillopora_acuta_HIv2___Sc0000005 2364120
##       end width strand   source       type score.x phase
## 1 2349234 19471      + AUGUSTUS transcript      NA    NA
## 2 2032291  5941      - AUGUSTUS transcript      NA    NA
## 3 7864189  3795      + AUGUSTUS transcript      NA    NA
## 4 7864189  3795      + AUGUSTUS transcript      NA    NA
## 5 7864189  3795      + AUGUSTUS transcript      NA    NA
## 6 2382635 18516      + AUGUSTUS transcript      NA    NA
##                                          ID
## 1 Pocillopora_acuta_HIv2___RNAseq.g10093.t2
## 2 Pocillopora_acuta_HIv2___RNAseq.g11609.t1
## 3 Pocillopora_acuta_HIv2___RNAseq.g13172.t1
## 4 Pocillopora_acuta_HIv2___RNAseq.g13172.t1
## 5 Pocillopora_acuta_HIv2___RNAseq.g13172.t1
## 6 Pocillopora_acuta_HIv2___RNAseq.g13823.t1
##                               transcript_id       seed_ortholog    evalue
## 1 Pocillopora_acuta_HIv2___RNAseq.g10093.t2      45351.EDO38228 1.51e-306
## 2 Pocillopora_acuta_HIv2___RNAseq.g11609.t1 6087.XP_002163848.1 4.11e-211
## 3 Pocillopora_acuta_HIv2___RNAseq.g13172.t1                <NA>        NA
## 4 Pocillopora_acuta_HIv2___RNAseq.g13172.t1                <NA>        NA
## 5 Pocillopora_acuta_HIv2___RNAseq.g13172.t1                <NA>        NA
## 6 Pocillopora_acuta_HIv2___RNAseq.g13823.t1                <NA>        NA
##   score.y
## 1     851
## 2     607
## 3      NA
## 4      NA
## 5      NA
## 6      NA
##                                                                                                                 eggNOG_OGs
## 1 COG0695@1|root,COG1249@1|root,KOG1752@2759|Eukaryota,KOG4716@2759|Eukaryota,38BHT@33154|Opisthokonta,3BBN5@33208|Metazoa
## 2                                           28N88@1|root,2QUTJ@2759|Eukaryota,39ZRG@33154|Opisthokonta,3BMP5@33208|Metazoa
## 3                                                                                                                     <NA>
## 4                                                                                                                     <NA>
## 5                                                                                                                     <NA>
## 6                                                                                                                     <NA>
##   max_annot_lvl COG_category                              Description
## 1 33208|Metazoa            C thioredoxin-disulfide reductase activity
## 2 33208|Metazoa            -                                        -
## 3          <NA>         <NA>                                     <NA>
## 4          <NA>         <NA>                                     <NA>
## 5          <NA>         <NA>                                     <NA>
## 6          <NA>         <NA>                                     <NA>
##   Preferred_name
## 1         TXNRD1
## 2              -
## 3           <NA>
## 4           <NA>
## 5           <NA>
## 6           <NA>
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            GOs
## 1 GO:0000003;GO:0000302;GO:0000305;GO:0001650;GO:0001704;GO:0001707;GO:0001887;GO:0001890;GO:0003006;GO:0003674;GO:0003824;GO:0004791;GO:0005488;GO:0005515;GO:0005575;GO:0005622;GO:0005623;GO:0005634;GO:0005654;GO:0005730;GO:0005737;GO:0005739;GO:0005783;GO:0005829;GO:0006082;GO:0006139;GO:0006518;GO:0006520;GO:0006575;GO:0006725;GO:0006732;GO:0006733;GO:0006739;GO:0006749;GO:0006753;GO:0006790;GO:0006793;GO:0006796;GO:0006807;GO:0006950;GO:0006979;GO:0007154;GO:0007165;GO:0007275;GO:0007369;GO:0007498;GO:0008150;GO:0008152;GO:0008283;GO:0009056;GO:0009069;GO:0009117;GO:0009611;GO:0009628;GO:0009636;GO:0009653;GO:0009790;GO:0009888;GO:0009987;GO:0010035;GO:0010038;GO:0010269;GO:0010941;GO:0010942;GO:0012505;GO:0015036;GO:0015949;GO:0016043;GO:0016174;GO:0016209;GO:0016259;GO:0016491;GO:0016651;GO:0016667;GO:0016668;GO:0016999;GO:0017001;GO:0017144;GO:0018996;GO:0019216;GO:0019222;GO:0019362;GO:0019637;GO:0019725;GO:0019752;GO:0022404;GO:0022414;GO:0022607;GO:0023052;GO:0031974;GO:0031981;GO:0032501;GO:0032502;GO:0033554;GO:0033797;GO:0034599;GO:0034641;GO:0036295;GO:0036296;GO:0036477;GO:0042221;GO:0042303;GO:0042395;GO:0042493;GO:0042537;GO:0042592;GO:0042737;GO:0042743;GO:0042744;GO:0042802;GO:0042803;GO:0043025;GO:0043167;GO:0043169;GO:0043226;GO:0043227;GO:0043228;GO:0043229;GO:0043231;GO:0043232;GO:0043233;GO:0043436;GO:0043603;GO:0043933;GO:0044085;GO:0044237;GO:0044238;GO:0044248;GO:0044281;GO:0044297;GO:0044422;GO:0044424;GO:0044428;GO:0044444;GO:0044446;GO:0044452;GO:0044464;GO:0045340;GO:0045454;GO:0046483;GO:0046496;GO:0046688;GO:0046872;GO:0046914;GO:0046983;GO:0048332;GO:0048513;GO:0048518;GO:0048522;GO:0048598;GO:0048608;GO:0048646;GO:0048678;GO:0048729;GO:0048731;GO:0048856;GO:0050664;GO:0050789;GO:0050794;GO:0050896;GO:0051186;GO:0051187;GO:0051259;GO:0051262;GO:0051716;GO:0055086;GO:0055093;GO:0055114;GO:0061458;GO:0065003;GO:0065007;GO:0065008;GO:0070013;GO:0070276;GO:0070482;GO:0070887;GO:0070995;GO:0071241;GO:0071248;GO:0071280;GO:0071453;GO:0071455;GO:0071704;GO:0071840;GO:0072524;GO:0072593;GO:0080090;GO:0097237;GO:0097458;GO:0098623;GO:0098625;GO:0098626;GO:0098754;GO:0098869;GO:1901360;GO:1901564;GO:1901605;GO:1901700;GO:1990748
## 2                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         <NA>
## 3                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         <NA>
## 4                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         <NA>
## 5                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         <NA>
## 6                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         <NA>
##        EC   KEGG_ko                                       KEGG_Pathway
## 1 1.8.1.9 ko:K22182 ko00450,ko05200,ko05225,map00450,map05200,map05225
## 2       -         -                                                  -
## 3    <NA>      <NA>                                               <NA>
## 4    <NA>      <NA>                                               <NA>
## 5    <NA>      <NA>                                               <NA>
## 6    <NA>      <NA>                                               <NA>
##   KEGG_Module KEGG_Reaction     KEGG_rclass                   BRITE KEGG_TC
## 1           - R03596,R09372 RC02518,RC02873 ko00000,ko00001,ko01000       -
## 2           -             -               -                       -       -
## 3        <NA>          <NA>            <NA>                    <NA>    <NA>
## 4        <NA>          <NA>            <NA>                    <NA>    <NA>
## 5        <NA>          <NA>            <NA>                    <NA>    <NA>
## 6        <NA>          <NA>            <NA>                    <NA>    <NA>
##   CAZy BiGG_Reaction                                  PFAMs KEGG_new
## 1    -             - Glutaredoxin,Pyr_redox_2,Pyr_redox_dim   K22182
## 2    -             -                                      -     <NA>
## 3 <NA>          <NA>                                   <NA>     <NA>
## 4 <NA>          <NA>                                   <NA>     <NA>
## 5 <NA>          <NA>                                   <NA>     <NA>
## 6 <NA>          <NA>                                   <NA>   K22017
##                                substanceBXH                         geneSymbol
## 1 Pocillopora_acuta_HIv2___RNAseq.g10093.t2 Pocillopora_acuta_HIv2___Sc0000021
## 2 Pocillopora_acuta_HIv2___RNAseq.g11609.t1 Pocillopora_acuta_HIv2___Sc0000013
## 3 Pocillopora_acuta_HIv2___RNAseq.g13172.t1 Pocillopora_acuta_HIv2___Sc0000004
## 4 Pocillopora_acuta_HIv2___RNAseq.g13172.t1 Pocillopora_acuta_HIv2___Sc0000004
## 5 Pocillopora_acuta_HIv2___RNAseq.g13172.t1 Pocillopora_acuta_HIv2___Sc0000004
## 6 Pocillopora_acuta_HIv2___RNAseq.g13823.t1 Pocillopora_acuta_HIv2___Sc0000005
##   moduleColor
## 1       brown
## 2   turquoise
## 3         red
## 4         red
## 5         red
## 6        pink
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       GO.terms
## 1 GO:0000003,GO:0000302,GO:0000305,GO:0001650,GO:0001704,GO:0001707,GO:0001887,GO:0001890,GO:0003006,GO:0003674,GO:0003824,GO:0004791,GO:0005488,GO:0005515,GO:0005575,GO:0005622,GO:0005623,GO:0005634,GO:0005654,GO:0005730,GO:0005737,GO:0005739,GO:0005783,GO:0005829,GO:0006082,GO:0006139,GO:0006518,GO:0006520,GO:0006575,GO:0006725,GO:0006732,GO:0006733,GO:0006739,GO:0006749,GO:0006753,GO:0006790,GO:0006793,GO:0006796,GO:0006807,GO:0006950,GO:0006979,GO:0007154,GO:0007165,GO:0007275,GO:0007369,GO:0007498,GO:0008150,GO:0008152,GO:0008283,GO:0009056,GO:0009069,GO:0009117,GO:0009611,GO:0009628,GO:0009636,GO:0009653,GO:0009790,GO:0009888,GO:0009987,GO:0010035,GO:0010038,GO:0010269,GO:0010941,GO:0010942,GO:0012505,GO:0015036,GO:0015949,GO:0016043,GO:0016174,GO:0016209,GO:0016259,GO:0016491,GO:0016651,GO:0016667,GO:0016668,GO:0016999,GO:0017001,GO:0017144,GO:0018996,GO:0019216,GO:0019222,GO:0019362,GO:0019637,GO:0019725,GO:0019752,GO:0022404,GO:0022414,GO:0022607,GO:0023052,GO:0031974,GO:0031981,GO:0032501,GO:0032502,GO:0033554,GO:0033797,GO:0034599,GO:0034641,GO:0036295,GO:0036296,GO:0036477,GO:0042221,GO:0042303,GO:0042395,GO:0042493,GO:0042537,GO:0042592,GO:0042737,GO:0042743,GO:0042744,GO:0042802,GO:0042803,GO:0043025,GO:0043167,GO:0043169,GO:0043226,GO:0043227,GO:0043228,GO:0043229,GO:0043231,GO:0043232,GO:0043233,GO:0043436,GO:0043603,GO:0043933,GO:0044085,GO:0044237,GO:0044238,GO:0044248,GO:0044281,GO:0044297,GO:0044422,GO:0044424,GO:0044428,GO:0044444,GO:0044446,GO:0044452,GO:0044464,GO:0045340,GO:0045454,GO:0046483,GO:0046496,GO:0046688,GO:0046872,GO:0046914,GO:0046983,GO:0048332,GO:0048513,GO:0048518,GO:0048522,GO:0048598,GO:0048608,GO:0048646,GO:0048678,GO:0048729,GO:0048731,GO:0048856,GO:0050664,GO:0050789,GO:0050794,GO:0050896,GO:0051186,GO:0051187,GO:0051259,GO:0051262,GO:0051716,GO:0055086,GO:0055093,GO:0055114,GO:0061458,GO:0065003,GO:0065007,GO:0065008,GO:0070013,GO:0070276,GO:0070482,GO:0070887,GO:0070995,GO:0071241,GO:0071248,GO:0071280,GO:0071453,GO:0071455,GO:0071704,GO:0071840,GO:0072524,GO:0072593,GO:0080090,GO:0097237,GO:0097458,GO:0098623,GO:0098625,GO:0098626,GO:0098754,GO:0098869,GO:1901360,GO:1901564,GO:1901605,GO:1901700,GO:1990748
## 2                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            -
## 3                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         <NA>
## 4                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         <NA>
## 5                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         <NA>
## 6                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         <NA>
##                             GO.description     GS.Flat    GS.Slope    p.GS.Flat
## 1 thioredoxin-disulfide reductase activity  0.57178848 -0.57178848 3.311055e-05
## 2                                        - -0.29586493  0.29586493 4.589336e-02
## 3                                     <NA>  0.35628512 -0.35628512 1.508700e-02
## 4                                     <NA>  0.35628512 -0.35628512 1.508700e-02
## 5                                     <NA>  0.35628512 -0.35628512 1.508700e-02
## 6                                     <NA> -0.05455251  0.05455251 7.187880e-01
##     p.GS.Slope    A.brown    p.A.brown  A.magenta  p.A.magenta      A.red
## 1 3.311055e-05  0.7005073 5.973619e-08 -0.3738439 1.048844e-02  0.2901298
## 2 4.589336e-02 -0.4291375 2.921081e-03  0.3115539 3.505853e-02 -0.3452015
## 3 1.508700e-02  0.4914202 5.241968e-04 -0.6288605 2.864308e-06  0.6673892
## 4 1.508700e-02  0.4914202 5.241968e-04 -0.6288605 2.864308e-06  0.6673892
## 5 1.508700e-02  0.4914202 5.241968e-04 -0.6288605 2.864308e-06  0.6673892
## 6 7.187880e-01  0.0972208 5.203783e-01 -0.3252127 2.743096e-02  0.3709019
##        p.A.red A.turquoise p.A.turquoise   A.purple   p.A.purple    A.green
## 1 5.047695e-02 -0.43323233  2.634293e-03  0.6984202 6.792759e-08  0.4574538
## 2 1.879503e-02  0.58815287  1.720729e-05 -0.1784560 2.353887e-01 -0.1306835
## 3 4.071016e-07 -0.13892006  3.571825e-01  0.1198762 4.274677e-01  0.2378899
## 4 4.071016e-07 -0.13892006  3.571825e-01  0.1198762 4.274677e-01  0.2378899
## 5 4.071016e-07 -0.13892006  3.571825e-01  0.1198762 4.274677e-01  0.2378899
## 6 1.116224e-02  0.08164806  5.895928e-01 -0.1391597 3.563456e-01  0.1614616
##     p.A.green A.lightcyan p.A.lightcyan     A.pink     p.A.pink      A.blue
## 1 0.001391986  -0.3508191  1.682948e-02  0.1707384 2.565893e-01  0.12358439
## 2 0.386672688   0.1196505  4.283449e-01 -0.1522331 3.125037e-01 -0.58598406
## 3 0.111386103  -0.6473842  1.159989e-06  0.7188738 1.835918e-08  0.07448551
## 4 0.111386103  -0.6473842  1.159989e-06  0.7188738 1.835918e-08  0.07448551
## 5 0.111386103  -0.6473842  1.159989e-06  0.7188738 1.835918e-08  0.07448551
## 6 0.283719167  -0.5276145  1.646022e-04  0.6417477 1.537006e-06 -0.02286640
##       p.A.blue  A.salmon  p.A.salmon A.midnightblue p.A.midnightblue
## 1 4.132051e-01 0.1178467 0.435389343      0.2439890       0.10224333
## 2 1.880492e-05 0.1907995 0.204028320      0.2258109       0.13131383
## 3 6.227429e-01 0.4254256 0.003204458      0.2691022       0.07053914
## 4 6.227429e-01 0.4254256 0.003204458      0.2691022       0.07053914
## 5 6.227429e-01 0.4254256 0.003204458      0.2691022       0.07053914
## 6 8.801027e-01 0.2940377 0.047315397      0.2906592       0.05003895
##       A.black  p.A.black      A.cyan  p.A.cyan    A.yellow   p.A.yellow
## 1 -0.28430645 0.05550307  0.04904562 0.7461773  0.05522073 0.7154873547
## 2 -0.18825739 0.21023361  0.07386502 0.6256510 -0.14392338 0.3399558326
## 3  0.09618758 0.52484209  0.16699226 0.2673276 -0.38010677 0.0091694889
## 4  0.09618758 0.52484209  0.16699226 0.2673276 -0.38010677 0.0091694889
## 5  0.09618758 0.52484209  0.16699226 0.2673276 -0.38010677 0.0091694889
## 6  0.02103556 0.88964060 -0.13338389 0.3768501 -0.46998526 0.0009821983
##        A.tan     p.A.tan Length
## 1  0.2648346 0.075293267  19471
## 2  0.2613466 0.079363532   5941
## 3 -0.2055446 0.170565420   3795
## 4 -0.2055446 0.170565420   3795
## 5 -0.2055446 0.170565420   3795
## 6 -0.3805358 0.009084622  18516
length(unique(biomin_mod$gene_id))
## [1] 65
plyr::count(biomin_mod, "moduleColor")
##    moduleColor freq
## 1        black    3
## 2         blue   36
## 3        brown   17
## 4         cyan    2
## 5        green    3
## 6      magenta    1
## 7         pink    7
## 8          red   18
## 9       salmon    6
## 10         tan    3
## 11   turquoise   21
## 12      yellow   10

GO term enrichment for biomineralization genes (65 unique genes, but only 22 have GO term annotation)

### Generate vector with names of all genes 
ALL.vector <- c(geneInfo$gene_id)
### Generate length vector for all genes 
LENGTH.vector <- as.integer(geneInfo$Length)
ID.vector_biomin <- biomin_mod %>%
  pull(gene_id) %>% unique()

##Get a list of GO Terms for each module
GO.terms_biomin <- biomin_mod %>%
  dplyr::select(GOs,gene_id) %>% unique() %>% rename(GOs = "GO.terms")

dim(GO.terms_biomin)
## [1] 65  2
dim(GO.terms_biomin %>% filter(is.na(GO.terms)))
## [1] 43  2
dim(GO.terms_biomin %>% filter(!is.na(GO.terms)))
## [1] 22  2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms_biomin$GO.terms), ";") 
split2 <- data.frame(v1 = rep.int(GO.terms_biomin$gene_id, sapply(split, length)), v2 = unlist(split)) 
colnames(split2) <- c("gene_id", "GO.terms")
GO.terms_biomin <- split2
##Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector = as.integer(ALL.vector %in% ID.vector_biomin) 
names(gene.vector)<-ALL.vector#set names
#weight gene vector by bias for length of gene 
pwf<-nullp(gene.vector, ID.vector, bias.data=LENGTH.vector) 
## Warning in pcls(G): initial point very close to some inequality constraints

#run goseq using Wallenius method for all categories of GO terms 
GO.wall<-goseq(pwf, ID.vector_biomin, gene2cat=GO.terms_biomin, test.cats=c("GO:BP", "GO:MF", "GO:CC"), method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
head(GO)
##         GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 102 GO:0003674                       0                        1         20
## 108 GO:0003824                       0                        1         10
## 126 GO:0005488                       0                        1         14
## 130 GO:0005515                       0                        1         12
## 133 GO:0005575                       0                        1         21
## 136 GO:0005622                       0                        1         15
##     numInCat                               term ontology
## 102       20                 molecular_function       MF
## 108       10                 catalytic activity       MF
## 126       14                            binding       MF
## 130       12                    protein binding       MF
## 133       21                 cellular_component       CC
## 136       15 intracellular anatomical structure       CC
#adjust p-values 
GO$bh_adjust <-  p.adjust(GO$over_represented_pvalue, method="BH") #add adjusted p-values
#Filtering for p < 0.05
GO_05 <- GO %>%
        dplyr::filter(bh_adjust<0.05) %>%
        dplyr::arrange(., ontology, bh_adjust)
   
#Filtering for p < 0.00001
# GO_00001 <- GO %>%
#         dplyr::filter(bh_adjust<0.00001) %>%
#         dplyr::arrange(., ontology, bh_adjust)
   
#Write file of results 
#write.csv(GO_00001, file = "../../output/WGCNA/GO_analysis/goseq_pattern_biomin.csv")
write.csv(GO_05, file = "../../output/WGCNA/GO_analysis/goseq_pattern_biomin.csv")
go_results <-read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_biomin.csv")
head(go_results)
##   X     GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 1 GO:0006807                       0                        1         11
## 2 2 GO:0006810                       0                        1          7
## 3 3 GO:0006811                       0                        1          7
## 4 4 GO:0006873                       0                        1          6
## 5 5 GO:0006996                       0                        1          6
## 6 6 GO:0007154                       0                        1          9
##   numInCat                                     term ontology bh_adjust
## 1       11      nitrogen compound metabolic process       BP         0
## 2        7                                transport       BP         0
## 3        7                 monoatomic ion transport       BP         0
## 4        6 intracellular monoatomic ion homeostasis       BP         0
## 5        6                   organelle organization       BP         0
## 6        9                       cell communication       BP         0
go_results_BP <- go_results %>%
      filter(ontology=="BP") %>%
      filter(bh_adjust != "NA") %>%
      #filter(numInCat>10)%>%
      arrange(., bh_adjust)

dim(go_results_BP)
## [1] 1083    9
head(go_results_BP)
##   X     GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 1 GO:0006807                       0                        1         11
## 2 2 GO:0006810                       0                        1          7
## 3 3 GO:0006811                       0                        1          7
## 4 4 GO:0006873                       0                        1          6
## 5 5 GO:0006996                       0                        1          6
## 6 6 GO:0007154                       0                        1          9
##   numInCat                                     term ontology bh_adjust
## 1       11      nitrogen compound metabolic process       BP         0
## 2        7                                transport       BP         0
## 3        7                 monoatomic ion transport       BP         0
## 4        6 intracellular monoatomic ion homeostasis       BP         0
## 5        6                   organelle organization       BP         0
## 6        9                       cell communication       BP         0

Collapsing with simplifyEnrichment package

library(simplifyEnrichment)
BP_terms <- go_results_BP$GOterm

Mat <- GO_similarity(BP_terms, ont = "BP", db = "org.Ce.eg.db")
pdf(file = "../../output/WGCNA/GO_analysis/biomin_GOSEM.pdf", width = 7, height = 5)
simplifyGO(Mat, word_cloud_grob_param = list(max_width = 50), max_words=20)
dev.off()
library(rrvgo)
#Reduce/collapse GO term set with the rrvgo package 
simMatrix <- calculateSimMatrix(go_results_BP$GOterm,
                                orgdb="org.Ce.eg.db", #c. elegans database
                                ont="BP",
                                method="Rel")
## 
## preparing gene to GO mapping data...
## preparing IC data...
 #calculate similarity 
scores <- setNames(-log(go_results_BP$bh_adjust), go_results_BP$GOterm)
reducedTerms <- reduceSimMatrix(simMatrix,
                                scores,
                                threshold=0.7,
                                orgdb="org.Ce.eg.db")
dim(reducedTerms)
## [1] 768  10
#keep only the goterms from the reduced list
go_results_BP_reduced <- go_results_BP %>%
  filter(GOterm %in% reducedTerms$go)
 #add in parent terms to list of go terms 
go_results_BP_reduced$ParentTerm <- reducedTerms$parentTerm[match(go_results_BP_reduced$GOterm, reducedTerms$go)]

length(unique(go_results_BP_reduced$ParentTerm))
## [1] 91

The reduced list of terms is 180 terms that falls under 32 parent terms.

#write.csv(go_results_BP_reduced, "../../output/WGCNA/GO_analysis/Biomineralization_goterms.csv")
write.csv(go_results_BP_reduced, "../../output/WGCNA/GO_analysis/goseq_pattern_biomin_filtered.csv")

Can further reduce like so:

Further reduce by the following keywords

#add vector for terms of interest to reduce number of GO terms - NOT using this to look at individual modules for exploratory purposes
keywords<-c("metabolism", "carbon","bicarbonate", "apoptosis", "death", "symbiosis", "regulation of cell communication", "trans membrane transport", "transmembrane",  "organic substance transport", "inorganic substance transport","response to stress", "antioxidant", "calcification","biomineralization", "heat","transporters","proton transport","ion transport","acid-base", "homeostasis")

Search for these keywords in the parent terms

go_results_BP_reduced <- go_results_BP_reduced %>%
   filter(grepl(paste(keywords, collapse="|"), ParentTerm))

This is only 17 terms

#write.csv(go_results_BP_reduced, "../../output/WGCNA/GO_analysis/Biomineralization_goterms.csv")
write.csv(go_results_BP_reduced, "../../output/WGCNA/GO_analysis/goseq_pattern_biomin_filtered_keywords.csv")
#plot significantly enriched GO terms by Slim Category faceted by slim term 
 GO.plot_biomin <-  ggplot(go_results_BP_reduced, aes(x = ontology, y = term)) + 
    geom_point(aes(size=bh_adjust)) + 
    scale_size(name="Over rep. p-value", trans="reverse", range=c(1,3))+
    facet_grid(ParentTerm ~ ., scales = "free", labeller = label_wrap_gen(width = 5, multi_line = TRUE))+
    theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
    strip.text.y = element_text(angle=0, size = 10),
    strip.text.x = element_text(size = 20),
    axis.text = element_text(size = 8),
    axis.title.x = element_blank(),
    axis.title.y = element_blank())
GO.plot_biomin

Combining with Calcification Up and Down module info to make figure 4

Reduce the lists together

GO_00001_up <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification.csv") %>% dplyr::select(-"X")
GO_00001_down <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down.csv") %>% dplyr::select(-"X")
GO_00001_biomin <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_biomin.csv") %>% dplyr::select(-"X")
go_results_BP_up <- GO_00001_up %>%
      filter(ontology=="BP")%>%
      filter(bh_adjust != "NA") %>%
      filter(numInCat>10)%>%
      arrange(., bh_adjust)
dim(go_results_BP_up)
## [1] 1976    8
go_results_BP_down <- GO_00001_down %>%
      filter(ontology=="BP")%>%
      filter(bh_adjust != "NA") %>%
      filter(numInCat>10)%>%
      arrange(., bh_adjust)
dim(go_results_BP_down)
## [1] 1822    8
go_results_BP_biomin <- GO_00001_biomin %>%
      filter(ontology=="BP")%>%
      filter(bh_adjust != "NA") %>%
      #filter(numInCat>10)%>%
      arrange(., bh_adjust)
dim(go_results_BP_biomin)
## [1] 1083    8
all_enriched_terms_up_down_biomin <- c(go_results_BP_up$GOterm,go_results_BP_down$GOterm,go_results_BP_biomin$GOterm)
length(all_enriched_terms_up_down_biomin)
## [1] 4881
length(unique(all_enriched_terms_up_down_biomin))
## [1] 2465

By reducing the lists together, we can ensure that each GO term is given the same parent term whether the term was in the Up or Down modules or the biomineralization genes.

library(rrvgo)
#Reduce/collapse GO term set with the rrvgo package 
simMatrix <- calculateSimMatrix(unique(all_enriched_terms_up_down_biomin),
                                orgdb="org.Ce.eg.db", #c. elegans database
                                ont="BP",
                                method="Rel")
## preparing gene to GO mapping data...
## preparing IC data...
#calculate similarity 
reducedTerms <- reduceSimMatrix(simMatrix,
                                scores = "size",
                                threshold=0.7,
                                orgdb="org.Ce.eg.db")
## No scores provided. Falling back to term's GO size
dim(reducedTerms)
## [1] 1941   10
#keep only the goterms from the reduced list
go_results_BP_up_reduced <- go_results_BP_up %>%
  filter(GOterm %in% reducedTerms$go)
 #add in parent terms to list of go terms 
go_results_BP_up_reduced$ParentTerm <- reducedTerms$parentTerm[match(go_results_BP_up_reduced$GOterm, reducedTerms$go)]

length(unique(go_results_BP_up_reduced$GOterm))
## [1] 1682
length(unique(go_results_BP_up_reduced$ParentTerm))
## [1] 134
#keep only the goterms from the reduced list
go_results_BP_down_reduced <- go_results_BP_down %>%
  filter(GOterm %in% reducedTerms$go)
 #add in parent terms to list of go terms 
go_results_BP_down_reduced$ParentTerm <- reducedTerms$parentTerm[match(go_results_BP_down_reduced$GOterm, reducedTerms$go)]

length(unique(go_results_BP_down_reduced$GOterm))
## [1] 1589
length(unique(go_results_BP_down_reduced$ParentTerm))
## [1] 137
#keep only the goterms from the reduced list
go_results_BP_biomin_reduced <- go_results_BP_biomin %>%
  filter(GOterm %in% reducedTerms$go)
 #add in parent terms to list of go terms 
go_results_BP_biomin_reduced$ParentTerm <- reducedTerms$parentTerm[match(go_results_BP_biomin_reduced$GOterm, reducedTerms$go)]

length(unique(go_results_BP_biomin_reduced$GOterm))
## [1] 768
length(unique(go_results_BP_biomin_reduced$ParentTerm))
## [1] 107

Further reduce by the following keywords

#add vector for terms of interest to reduce number of GO terms - NOT using this to look at individual modules for exploratory purposes
keywords<-c("metabolism", "carbon","bicarbonate", "apoptosis", "death", "symbiosis", "regulation of cell communication", "trans membrane transport", "transmembrane",  "organic substance transport", "inorganic substance transport","response to stress", "antioxidant", "calcification","biomineralization", "heat","transporters","proton transport","ion transport","acid-base", "homeostasis")

Search for these keywords in the parent terms

go_results_BP_up_reduced_keywords <- go_results_BP_up_reduced %>%
   filter(grepl(paste(keywords, collapse="|"), ParentTerm))

length(unique(go_results_BP_up_reduced_keywords$GOterm))
## [1] 84
length(unique(go_results_BP_up_reduced_keywords$ParentTerm))
## [1] 6
go_results_BP_down_reduced_keywords <- go_results_BP_down_reduced %>%
   filter(grepl(paste(keywords, collapse="|"), ParentTerm))

length(unique(go_results_BP_down_reduced_keywords$GOterm))
## [1] 72
length(unique(go_results_BP_down_reduced_keywords$ParentTerm))
## [1] 6
go_results_BP_biomin_reduced_keywords <- go_results_BP_biomin_reduced %>%
   filter(grepl(paste(keywords, collapse="|"), ParentTerm))

length(unique(go_results_BP_biomin_reduced_keywords$GOterm))
## [1] 61
length(unique(go_results_BP_biomin_reduced_keywords$ParentTerm))
## [1] 6
#write.csv(go_results_BP_up_reduced_keywords, "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_filtered.csv")
#write.csv(go_results_BP_down_reduced_keywords, "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down_filtered.csv")
#write.csv(go_results_BP_biomin_reduced_keywords, "../../output/WGCNA/GO_analysis/goseq_pattern_biomin_filtered.csv")

Won’t move forward with these for now, but they are there if we need further reduced lists.

The following list is a list of BP-ontology (“Biological Process”) GO terms that were significantly enriched (P_adj < 0.00001) within the modules that were upregulated for calcification. The list has been further reduced by using the package rrvgo.

#cal_up_terms <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_filtered.csv")
cal_up_terms <- go_results_BP_up_reduced

cal_up_terms <- cal_up_terms %>%
  mutate(Factor = "Up")

cal_up_terms_parent_nterm <- cal_up_terms %>%
  dplyr::group_by(ParentTerm) %>%
  dplyr::summarize(Number.of.terms = n_distinct(term)) %>%
  mutate(Calcification.direction = "Up")  %>% ungroup()

head(cal_up_terms)
##       GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 GO:0006807                       0                        1       1215
## 2 GO:0007275                       0                        1        978
## 3 GO:0008152                       0                        1       1387
## 4 GO:0009987                       0                        1       1938
## 5 GO:0016043                       0                        1       1049
## 6 GO:0019222                       0                        1        982
##   numInCat                                term ontology bh_adjust
## 1     1215 nitrogen compound metabolic process       BP         0
## 2      978  multicellular organism development       BP         0
## 3     1387                   metabolic process       BP         0
## 4     1938                    cellular process       BP         0
## 5     1049     cellular component organization       BP         0
## 6      982     regulation of metabolic process       BP         0
##                                  ParentTerm Factor
## 1                         metabolic process     Up
## 2                      cell differentiation     Up
## 3                         metabolic process     Up
## 4                          cellular process     Up
## 5                mitochondrion organization     Up
## 6 regulation of DNA-templated transcription     Up

The following list is a list of BP-ontology (“Biological Process”) GO terms that were significantly enriched (P_adj < 0.00001) within the modules that were downregulated for calcification. The list has been further reduced by using the package rrvgo.

#cal_down_terms <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down_filtered.csv")
cal_down_terms <- go_results_BP_down_reduced
cal_down_terms <- cal_down_terms %>% mutate(Factor = "Down")

cal_down_terms_parent_nterm <- cal_down_terms %>%
  dplyr::group_by(ParentTerm) %>%
  dplyr::summarize(Number.of.terms = n_distinct(term)) %>%
  mutate(Calcification.direction = "Down")  %>% ungroup()

head(cal_down_terms)
##       GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 GO:0006807                       0                        1        910
## 2 GO:0007275                       0                        1        610
## 3 GO:0008152                       0                        1        998
## 4 GO:0009987                       0                        1       1277
## 5 GO:0016043                       0                        1        652
## 6 GO:0019222                       0                        1        599
##   numInCat                                term ontology bh_adjust
## 1      910 nitrogen compound metabolic process       BP         0
## 2      610  multicellular organism development       BP         0
## 3      998                   metabolic process       BP         0
## 4     1277                    cellular process       BP         0
## 5      652     cellular component organization       BP         0
## 6      599     regulation of metabolic process       BP         0
##                                  ParentTerm Factor
## 1                         metabolic process   Down
## 2                      cell differentiation   Down
## 3                         metabolic process   Down
## 4                          cellular process   Down
## 5                mitochondrion organization   Down
## 6 regulation of DNA-templated transcription   Down

The following list is a list of BP-ontology (“Biological Process”) GO terms that were significantly enriched (P_adj < 0.00001) within the list of biomineralization-related genes. The list has been further reduced by using the package rrvgo.

#cal_biomin_terms <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_biomin_filtered.csv")
cal_biomin_terms <- go_results_BP_biomin_reduced
cal_biomin_terms <- cal_biomin_terms %>% mutate(Factor = "Biomin")
head(cal_biomin_terms)
##       GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 GO:0006807                       0                        1         11
## 2 GO:0006810                       0                        1          7
## 3 GO:0006811                       0                        1          7
## 4 GO:0006873                       0                        1          6
## 5 GO:0006996                       0                        1          6
## 6 GO:0007154                       0                        1          9
##   numInCat                                     term ontology bh_adjust
## 1       11      nitrogen compound metabolic process       BP         0
## 2        7                                transport       BP         0
## 3        7                 monoatomic ion transport       BP         0
## 4        6 intracellular monoatomic ion homeostasis       BP         0
## 5        6                   organelle organization       BP         0
## 6        9                       cell communication       BP         0
##                                 ParentTerm Factor
## 1                        metabolic process Biomin
## 2                                transport Biomin
## 3                 monoatomic ion transport Biomin
## 4    intracellular calcium ion homeostasis Biomin
## 5               mitochondrion organization Biomin
## 6 intracellular receptor signaling pathway Biomin

Merge up and down-regulation of calcification GOterms along with GOterms enriched in the biomineralization gene list

The following list is a list of BP-ontology (“Biological Process”) GO terms that were significantly enriched (P < 0.0001) within the modules that were upregulated or downregulated for calcification or in the biomineralization gene list. The list has been further reduced by using the package rrvgo.

all_terms <- rbind(cal_down_terms,cal_up_terms,cal_biomin_terms)

all_terms <-  all_terms[,c("Factor","GOterm","over_represented_pvalue","under_represented_pvalue","numDEInCat","numInCat","term","ontology","bh_adjust","ParentTerm")]

all_terms$GOterm<-as.factor(all_terms$GOterm)

dim(all_terms) #3453 reduced terms 
## [1] 4039   10
length(unique(all_terms$GOterm)) #this represents 1817 unique terms between the three lists of reduced terms
## [1] 1941
length(unique(all_terms$ParentTerm)) #this represents 136 unique terms between the three lists of reduced terms
## [1] 143

Unique terms

This is collapsing the list from 3273 rows to only 1809, representing the 1809 unique GO terms in the list above. It collapses the information in the columns “ParentTerm” and “Factor” so that for each GO term we get the parent terms associated and if this was enriched in modules upregulated for calcification, downregulated for calcification, or both.

goterms_shared <- all_terms %>%
  group_by(GOterm) %>%
  dplyr::summarise(
    ParentTerm = paste(unique(ParentTerm), collapse = ", "),
    Factor = paste(unique(Factor), collapse = ", "),
    term = paste(unique(term), collapse = ", ")
  )
dim(goterms_shared)
## [1] 1941    4
goterms_shared_full <- goterms_shared %>%
  left_join(dplyr::select(all_terms,GOterm, Factor, bh_adjust), by = "GOterm") %>% #select 3 columns GOterm, Factor, bh_adjust from all_terms and left join by GOterm, turns this dataframe from 1817 rows back to the 3453 in all_terms
  mutate(bh_adjust_Up = ifelse(Factor.y == "Up", bh_adjust, NA)) %>% #add a column that is the p-value for the Up factor
  mutate(bh_adjust_Down = ifelse(Factor.y == "Down", bh_adjust, NA)) %>% #add a column that is the p-value for the Down factor
  mutate(bh_adjust_Biomin = ifelse(Factor.y == "Biomin", bh_adjust, NA)) %>% #add a column that is the p-value for the Biomin factor
  dplyr::select(-c("bh_adjust", "Factor.y")) %>% #remove the unique columns so that we can collapse the dataframe
  group_by(GOterm, ParentTerm, Factor.x, term) %>% #group by the repeated columns for the non-unique GO terms
  dplyr::summarize(bh_adjust_Up = na.omit(bh_adjust_Up)[1], #carry over the p-value for the term in the up direction, by taking the first non-NA value.
            bh_adjust_Down = na.omit(bh_adjust_Down)[1], #carry over the p-value for the term in the down direction, by taking the first non-NA value.
            bh_adjust_Biomin = na.omit(bh_adjust_Biomin)[1]) %>% #carry over the p-value for the term in the biomin list, by taking the first non-NA value.
  rename(Factor.x = "Factor") #rename column 
## `summarise()` has grouped output by 'GOterm', 'ParentTerm', 'Factor.x'. You can
## override using the `.groups` argument.
write.csv(goterms_shared_full, "../../output/WGCNA/GO_analysis/Merged_GOterms_factor_ParentTerm_with_Biomin.csv")

calculations to check before plots

goterms_shared_full <- goterms_shared_full %>% group_by(ParentTerm) %>% mutate("N_in_Parent" = n()) %>% ungroup()

goterms_up <- goterms_shared_full %>% dplyr::filter(Factor=="Up")
goterms_down <- goterms_shared_full %>% dplyr::filter(Factor=="Down")
goterms_biomin <- goterms_shared_full %>% dplyr::filter(Factor=="Biomin")
goterms_shared_only <- goterms_shared_full %>% dplyr::filter(Factor=="Down, Up")
goterms_shared_only_biomin <- goterms_shared_full %>% dplyr::filter(Factor=="Down, Up, Biomin" | Factor=="Up, Biomin" | Factor=="Down, Biomin")

nrow(goterms_up)
## [1] 199
nrow(goterms_down)
## [1] 118
nrow(goterms_biomin)
## [1] 132
nrow(goterms_shared_only)
## [1] 856
nrow(goterms_shared_only_biomin)
## [1] 636
nrow(goterms_up) + nrow(goterms_down) + nrow(goterms_biomin) + nrow(goterms_shared_only) + nrow(goterms_shared_only_biomin) == nrow(goterms_shared_full)
## [1] TRUE
#how many parent terms in each
length(unique(goterms_up$ParentTerm))
## [1] 70
length(unique(goterms_down$ParentTerm))
## [1] 54
length(unique(goterms_biomin$ParentTerm))
## [1] 44
length(unique(goterms_shared_only$ParentTerm))
## [1] 114
length(unique(goterms_shared_only_biomin$ParentTerm))
## [1] 101

Plot of parent terms SHARED by Up and Down modules

For each parent term in this list, how many are “Up” by calcification and “Down” by calcification, whether or not they are also enriched in the biomineralization gene list

SharedGOterms = # of GO terms within the parent term that are in the list for each of the different factors

result_parent_unique <- goterms_shared %>%
  group_by(ParentTerm,Factor) %>%
  dplyr::summarise(SharedGOterms = n_distinct(GOterm)) %>%
  arrange(-SharedGOterms)
## `summarise()` has grouped output by 'ParentTerm'. You can override using the
## `.groups` argument.
unique(result_parent_unique$Factor)
## [1] "Down, Up"         "Down, Up, Biomin" "Down"             "Up"              
## [5] "Biomin"           "Up, Biomin"       "Down, Biomin"

For SHARED parent terms (ones that are in Up AND Down, whether or not they are in Biomin)

parents_shared <- result_parent_unique %>%
  dplyr::filter(Factor == "Down, Up, Biomin" | Factor=="Down, Up")
 #dplyr::filter(SharedGOterms>=5) 

Calculate the percentage of shared GOterms with Biomin. for upregulation of calcification

merged_shared <- parents_shared %>%
  dplyr::group_by(ParentTerm) %>%
  mutate(
    Has_Biomin = any(Factor == "Down, Up, Biomin"),
    Sum_of_SharedGOterms = sum(SharedGOterms, na.rm = TRUE),
    Proportion.of.shared.GO.terms.with.biomineralization.genes = ifelse(
      Factor %in% c("Down, Up, Biomin"),
      SharedGOterms / Sum_of_SharedGOterms,
      0
    )
  ) %>%
  mutate(Percentage.of.shared.GO.terms.with.biomineralization.genes = Proportion.of.shared.GO.terms.with.biomineralization.genes * 100) %>%
  filter((Has_Biomin == TRUE & Factor == "Down, Up, Biomin") | (Has_Biomin == FALSE & Factor == "Down, Up"))

merged_shared_clean <- na.omit(merged_shared) #remove rows for the non-"Down, Up, Biomin" rows that don't have a percentage
head(merged_shared_clean)
## # A tibble: 6 × 7
## # Groups:   ParentTerm [6]
##   ParentTerm                Factor SharedGOterms Has_Biomin Sum_of_SharedGOterms
##   <chr>                     <chr>          <int> <lgl>                     <int>
## 1 IRE1-mediated unfolded p… Down,…            27 TRUE                         55
## 2 mRNA processing           Down,…            27 FALSE                        27
## 3 anatomical structure mor… Down,…            23 TRUE                         26
## 4 regulation of DNA-templa… Down,…            23 TRUE                         51
## 5 nervous system developme… Down,…            19 TRUE                         25
## 6 G protein-coupled recept… Down,…            17 TRUE                         28
## # ℹ 2 more variables:
## #   Proportion.of.shared.GO.terms.with.biomineralization.genes <dbl>,
## #   Percentage.of.shared.GO.terms.with.biomineralization.genes <dbl>

Frequency >10

cal_freq_terms_filtered_shared <- merged_shared_clean %>%
  filter(Sum_of_SharedGOterms>=10) 
head(cal_freq_terms_filtered_shared)
## # A tibble: 6 × 7
## # Groups:   ParentTerm [6]
##   ParentTerm                Factor SharedGOterms Has_Biomin Sum_of_SharedGOterms
##   <chr>                     <chr>          <int> <lgl>                     <int>
## 1 IRE1-mediated unfolded p… Down,…            27 TRUE                         55
## 2 mRNA processing           Down,…            27 FALSE                        27
## 3 anatomical structure mor… Down,…            23 TRUE                         26
## 4 regulation of DNA-templa… Down,…            23 TRUE                         51
## 5 nervous system developme… Down,…            19 TRUE                         25
## 6 G protein-coupled recept… Down,…            17 TRUE                         28
## # ℹ 2 more variables:
## #   Proportion.of.shared.GO.terms.with.biomineralization.genes <dbl>,
## #   Percentage.of.shared.GO.terms.with.biomineralization.genes <dbl>

Figure

freq_fig_up<-ggplot(cal_freq_terms_filtered_shared, aes(y=Sum_of_SharedGOterms,x=reorder(ParentTerm, Sum_of_SharedGOterms), fill=Percentage.of.shared.GO.terms.with.biomineralization.genes,group=1))+
  geom_point(size=5, alpha=1, pch=21,color="black")+
  geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Sum_of_SharedGOterms)) +
  geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
  coord_flip()+
  scale_y_continuous(expression(GO~term~counts),limits=c(0,60))+
  scale_fill_gradientn(colours=c("white","#E6E6FA","#C9A0DC","#9370DB","#663399"), na.value = "grey98",limits = c(0, 100))+ 
  theme_classic()+
  theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        legend.title = element_blank(),
        axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger 
        axis.title.x = element_blank(),#making the axis title larger 
        axis.title.y = element_blank(),
        strip.text = element_text(size=12))#making the axis title larger 
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
freq_fig_up

ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_colored_sharedUpDown.pdf", plot=freq_fig_up, dpi=300, height=15, width=10, units="in", limitsize=FALSE)

Frequency >20

cal_freq_terms_filtered_shared <- merged_shared_clean %>%
  filter(Sum_of_SharedGOterms>=20) 
head(cal_freq_terms_filtered_shared)
## # A tibble: 6 × 7
## # Groups:   ParentTerm [6]
##   ParentTerm                Factor SharedGOterms Has_Biomin Sum_of_SharedGOterms
##   <chr>                     <chr>          <int> <lgl>                     <int>
## 1 IRE1-mediated unfolded p… Down,…            27 TRUE                         55
## 2 mRNA processing           Down,…            27 FALSE                        27
## 3 anatomical structure mor… Down,…            23 TRUE                         26
## 4 regulation of DNA-templa… Down,…            23 TRUE                         51
## 5 nervous system developme… Down,…            19 TRUE                         25
## 6 G protein-coupled recept… Down,…            17 TRUE                         28
## # ℹ 2 more variables:
## #   Proportion.of.shared.GO.terms.with.biomineralization.genes <dbl>,
## #   Percentage.of.shared.GO.terms.with.biomineralization.genes <dbl>

Figure

freq_fig_up<-ggplot(cal_freq_terms_filtered_shared, aes(y=Sum_of_SharedGOterms,x=reorder(ParentTerm, Sum_of_SharedGOterms), fill=Percentage.of.shared.GO.terms.with.biomineralization.genes,group=1))+
  geom_point(size=5, alpha=1, pch=21,color="black")+
  geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Sum_of_SharedGOterms)) +
  geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
  coord_flip()+
  scale_y_continuous(expression(GO~term~counts),limits=c(0,60))+
  scale_fill_gradientn(colours=c("white","#E6E6FA","#C9A0DC","#9370DB","#663399"), na.value = "grey98",limits = c(0, 100))+ 
  theme_classic()+
  theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        legend.title = element_blank(),
        axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger 
        axis.title.x = element_blank(),#making the axis title larger 
        axis.title.y = element_blank(),
        strip.text = element_text(size=12))#making the axis title larger 
freq_fig_up

ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_colored_sharedUpDown_gr20.pdf", plot=freq_fig_up, dpi=300, height=7, width=10, units="in", limitsize=FALSE)

Percentage shared >20

cal_freq_terms_filtered_shared <- merged_shared_clean %>%
  filter(Percentage.of.shared.GO.terms.with.biomineralization.genes>80) 
head(cal_freq_terms_filtered_shared)
## # A tibble: 6 × 7
## # Groups:   ParentTerm [6]
##   ParentTerm                Factor SharedGOterms Has_Biomin Sum_of_SharedGOterms
##   <chr>                     <chr>          <int> <lgl>                     <int>
## 1 anatomical structure mor… Down,…            23 TRUE                         26
## 2 negative regulation of t… Down,…            16 TRUE                         17
## 3 morphogenesis of an epit… Down,…            14 TRUE                         15
## 4 metabolic process         Down,…            13 TRUE                         15
## 5 locomotion                Down,…            10 TRUE                         11
## 6 regulation of multicellu… Down,…            10 TRUE                         12
## # ℹ 2 more variables:
## #   Proportion.of.shared.GO.terms.with.biomineralization.genes <dbl>,
## #   Percentage.of.shared.GO.terms.with.biomineralization.genes <dbl>

Figure

freq_fig_up<-ggplot(cal_freq_terms_filtered_shared, aes(y=Sum_of_SharedGOterms,x=reorder(ParentTerm, Percentage.of.shared.GO.terms.with.biomineralization.genes), fill=Percentage.of.shared.GO.terms.with.biomineralization.genes,group=1))+
  geom_point(size=5, alpha=1, pch=21,color="black")+
  geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Sum_of_SharedGOterms)) +
  geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
  coord_flip()+
  #scale_y_continuous(expression(GO~term~counts),limits=c(0,60))+
  scale_fill_gradientn(colours=c("white","#E6E6FA","#C9A0DC","#9370DB","#663399"), na.value = "grey98",limits = c(0, 100))+ 
  theme_classic()+
  theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        legend.title = element_blank(),
        axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger 
        axis.title.x = element_blank(),#making the axis title larger 
        axis.title.y = element_blank(),
        strip.text = element_text(size=12))#making the axis title larger 
freq_fig_up

ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_colored_sharedUpDown_80percentshared.pdf", plot=freq_fig_up, dpi=300, height=10, width=10, units="in", limitsize=FALSE)
freq_fig_up<-ggplot(cal_freq_terms_filtered_shared, aes(y=Sum_of_SharedGOterms,x=reorder(ParentTerm, Sum_of_SharedGOterms), fill=Percentage.of.shared.GO.terms.with.biomineralization.genes,group=1))+
  geom_point(size=5, alpha=1, pch=21,color="black")+
  geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Sum_of_SharedGOterms)) +
  geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
  coord_flip()+
  #scale_y_continuous(expression(GO~term~counts),limits=c(0,60))+
  scale_fill_gradientn(colours=c("white","#E6E6FA","#C9A0DC","#9370DB","#663399"), na.value = "grey98",limits = c(0, 100))+ 
  theme_classic()+
  theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        legend.title = element_blank(),
        axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger 
        axis.title.x = element_blank(),#making the axis title larger 
        axis.title.y = element_blank(),
        strip.text = element_text(size=12))#making the axis title larger 
freq_fig_up

ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_colored_sharedUpDown_80percentsharedordered.pdf", plot=freq_fig_up, dpi=300, height=10, width=10, units="in", limitsize=FALSE)

Plot of parent terms UNIQUE for Up and Down modules

For UNIQUE parent terms (ones that are only in Up or only in Down, whether or not they are in Biomin)

For UNIQUE parent terms (ones that are only in Up or only in Down, whether or not they are in Biomin)

#parent_filtered_up <- result_parent_unique %>%
#  dplyr::filter(Factor == "Up" | Factor=="Up, Biomin")
# #dplyr::filter(SharedGOterms>=5) 

parent_filtered_up <- result_parent_unique %>%
  dplyr::filter(Factor=="Up, Biomin")
 #dplyr::filter(SharedGOterms>=5) 
#parent_filtered_down <- result_parent_unique %>%
#  dplyr::filter(Factor=="Down" | Factor=="Down, Biomin")
# #dplyr::filter(SharedGOterms>=5) 

parent_filtered_down <- result_parent_unique %>%
  dplyr::filter(Factor=="Down, Biomin")
 #dplyr::filter(SharedGOterms>=5) 

Calculate the percentage of unique UP GO parent terms with Biomin. for upregulation of calcification

merged_up <- parent_filtered_up %>%
  dplyr::group_by(ParentTerm) %>%
  left_join(cal_up_terms_parent_nterm, by = "ParentTerm") %>%
  rename(SharedGOterms = "UniqueGOTerms") %>%
  mutate(
    Has_Biomin = any(Factor == "Up, Biomin"),
    Sum_of_UniqueGOterms = sum(UniqueGOTerms, na.rm = TRUE),
    Proportion.of.shared.GO.terms.with.biomineralization.genes = ifelse(
      Factor %in% c("Up, Biomin"),
      UniqueGOTerms / Sum_of_UniqueGOterms,
      0
    )
  ) %>%
  mutate(Percentage.of.shared.GO.terms.with.biomineralization.genes = Proportion.of.shared.GO.terms.with.biomineralization.genes * 100) %>%
  filter((Has_Biomin == TRUE & Factor == "Up, Biomin") | (Has_Biomin == FALSE & Factor == "Up"))

merged_up_clean <- na.omit(merged_up) #remove rows for the non-"Up, Biomin" rows that don't have a percentage
merged_up_clean
## # A tibble: 14 × 9
## # Groups:   ParentTerm [14]
##    ParentTerm        Factor UniqueGOTerms Number.of.terms Calcification.direct…¹
##    <chr>             <chr>          <int>           <int> <chr>                 
##  1 cellular oxidant… Up, B…             2              12 Up                    
##  2 cilium assembly   Up, B…             2              27 Up                    
##  3 fatty acid biosy… Up, B…             2              33 Up                    
##  4 hydrogen peroxid… Up, B…             2               3 Up                    
##  5 morphogenesis of… Up, B…             2              19 Up                    
##  6 nervous system d… Up, B…             2              31 Up                    
##  7 synapse organiza… Up, B…             2              15 Up                    
##  8 actin cytoskelet… Up, B…             1              12 Up                    
##  9 embryo developme… Up, B…             1               8 Up                    
## 10 intracellular ca… Up, B…             1              24 Up                    
## 11 nuclear migration Up, B…             1              16 Up                    
## 12 regulation of ch… Up, B…             1              12 Up                    
## 13 regulation of me… Up, B…             1              26 Up                    
## 14 sensory percepti… Up, B…             1              18 Up                    
## # ℹ abbreviated name: ¹​Calcification.direction
## # ℹ 4 more variables: Has_Biomin <lgl>, Sum_of_UniqueGOterms <int>,
## #   Proportion.of.shared.GO.terms.with.biomineralization.genes <dbl>,
## #   Percentage.of.shared.GO.terms.with.biomineralization.genes <dbl>

Calculate the percentage of shared GOterms downregulation of calcification

merged_down <- parent_filtered_down %>%
  dplyr::group_by(ParentTerm) %>%
  left_join(cal_down_terms_parent_nterm, by = "ParentTerm") %>%
  rename(SharedGOterms = "UniqueGOTerms") %>%
  mutate(
    Has_Biomin = any(Factor == "Down, Biomin"),
    Sum_of_UniqueGOterms = sum(UniqueGOTerms, na.rm = TRUE),
    Proportion.of.shared.GO.terms.with.biomineralization.genes = ifelse(
      Factor %in% c("Down, Biomin"),
      UniqueGOTerms / Sum_of_UniqueGOterms,
      0
    )
  ) %>%
  mutate(Percentage.of.shared.GO.terms.with.biomineralization.genes = Proportion.of.shared.GO.terms.with.biomineralization.genes * 100) %>%
  filter((Has_Biomin == TRUE & Factor == "Down, Biomin") | (Has_Biomin == FALSE & Factor == "Down"))

merged_down_clean <- na.omit(merged_down) #remove rows for the non-"Down, Biomin" rows that don't have a percentage
merged_down_clean
## # A tibble: 8 × 9
## # Groups:   ParentTerm [8]
##   ParentTerm         Factor UniqueGOTerms Number.of.terms Calcification.direct…¹
##   <chr>              <chr>          <int>           <int> <chr>                 
## 1 protein ubiquitin… Down,…             2              17 Down                  
## 2 apoptotic process  Down,…             1              25 Down                  
## 3 detection of stim… Down,…             1               4 Down                  
## 4 mitochondrion org… Down,…             1              28 Down                  
## 5 muscle organ deve… Down,…             1              14 Down                  
## 6 regulation of lip… Down,…             1               5 Down                  
## 7 sensory perceptio… Down,…             1              19 Down                  
## 8 transmembrane tra… Down,…             1               7 Down                  
## # ℹ abbreviated name: ¹​Calcification.direction
## # ℹ 4 more variables: Has_Biomin <lgl>, Sum_of_UniqueGOterms <int>,
## #   Proportion.of.shared.GO.terms.with.biomineralization.genes <dbl>,
## #   Percentage.of.shared.GO.terms.with.biomineralization.genes <dbl>

Frequency >10 up

cal_freq_terms_filtered_up <- merged_up_clean %>%
  #filter(Number.of.terms>=10) %>%
  filter(Calcification.direction=="Up")
cal_freq_terms_filtered_up
## # A tibble: 14 × 9
## # Groups:   ParentTerm [14]
##    ParentTerm        Factor UniqueGOTerms Number.of.terms Calcification.direct…¹
##    <chr>             <chr>          <int>           <int> <chr>                 
##  1 cellular oxidant… Up, B…             2              12 Up                    
##  2 cilium assembly   Up, B…             2              27 Up                    
##  3 fatty acid biosy… Up, B…             2              33 Up                    
##  4 hydrogen peroxid… Up, B…             2               3 Up                    
##  5 morphogenesis of… Up, B…             2              19 Up                    
##  6 nervous system d… Up, B…             2              31 Up                    
##  7 synapse organiza… Up, B…             2              15 Up                    
##  8 actin cytoskelet… Up, B…             1              12 Up                    
##  9 embryo developme… Up, B…             1               8 Up                    
## 10 intracellular ca… Up, B…             1              24 Up                    
## 11 nuclear migration Up, B…             1              16 Up                    
## 12 regulation of ch… Up, B…             1              12 Up                    
## 13 regulation of me… Up, B…             1              26 Up                    
## 14 sensory percepti… Up, B…             1              18 Up                    
## # ℹ abbreviated name: ¹​Calcification.direction
## # ℹ 4 more variables: Has_Biomin <lgl>, Sum_of_UniqueGOterms <int>,
## #   Proportion.of.shared.GO.terms.with.biomineralization.genes <dbl>,
## #   Percentage.of.shared.GO.terms.with.biomineralization.genes <dbl>

Figure

freq_fig_up<-ggplot(cal_freq_terms_filtered_up, aes(y=Sum_of_UniqueGOterms,x=reorder(ParentTerm, Sum_of_UniqueGOterms), fill=Percentage.of.shared.GO.terms.with.biomineralization.genes,group=1))+
  geom_point(size=5, alpha=1, pch=21,color="black")+
  geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Sum_of_UniqueGOterms)) +
  geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
  coord_flip()+
  #scale_y_continuous(expression(GO~term~counts),limits=c(0,15))+
  scale_fill_gradientn(colours=c("white","#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(0, 100))+ 
  theme_classic()+
  theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        legend.title = element_blank(),
        axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger 
        axis.title.x = element_blank(),#making the axis title larger 
        axis.title.y = element_blank(),
        strip.text = element_text(size=12)) #making the axis title larger 
freq_fig_up

ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_up_colored_unique_biomin.pdf", plot=freq_fig_up, dpi=300, height=5, width=10, units="in", limitsize=FALSE)

Frequency >10 down

cal_freq_terms_filtered_down <- merged_down_clean %>%
  #filter(Number.of.terms>=10) %>%
  filter(Calcification.direction=="Down")
cal_freq_terms_filtered_down
## # A tibble: 8 × 9
## # Groups:   ParentTerm [8]
##   ParentTerm         Factor UniqueGOTerms Number.of.terms Calcification.direct…¹
##   <chr>              <chr>          <int>           <int> <chr>                 
## 1 protein ubiquitin… Down,…             2              17 Down                  
## 2 apoptotic process  Down,…             1              25 Down                  
## 3 detection of stim… Down,…             1               4 Down                  
## 4 mitochondrion org… Down,…             1              28 Down                  
## 5 muscle organ deve… Down,…             1              14 Down                  
## 6 regulation of lip… Down,…             1               5 Down                  
## 7 sensory perceptio… Down,…             1              19 Down                  
## 8 transmembrane tra… Down,…             1               7 Down                  
## # ℹ abbreviated name: ¹​Calcification.direction
## # ℹ 4 more variables: Has_Biomin <lgl>, Sum_of_UniqueGOterms <int>,
## #   Proportion.of.shared.GO.terms.with.biomineralization.genes <dbl>,
## #   Percentage.of.shared.GO.terms.with.biomineralization.genes <dbl>

Figure

freq_fig_down<-ggplot(cal_freq_terms_filtered_down, aes(y=Sum_of_UniqueGOterms,x=reorder(ParentTerm, Sum_of_UniqueGOterms), fill=Percentage.of.shared.GO.terms.with.biomineralization.genes,group=1))+
  geom_point(size=5, alpha=1, pch=21,color="black")+
  geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Sum_of_UniqueGOterms)) +
  geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
  coord_flip()+
  #scale_y_continuous(expression(GO~term~counts),limits=c(0,20))+
  scale_fill_gradientn(colours=c("white","#d1e5f0","#92c5de","#4393c3","#2166ac"), na.value = "grey98",limits = c(0, 100))+ 
  theme_classic()+
  theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        legend.title = element_blank(),
        axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger 
        axis.title.x = element_blank(),#making the axis title larger 
        axis.title.y = element_blank(),
        strip.text = element_text(size=12))#making the axis title larger 
freq_fig_down

ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_down_colored_unique_biomin.pdf", plot=freq_fig_down, dpi=300, height=5, width=10, units="in", limitsize=FALSE)
compare_figs<-cowplot::plot_grid(freq_fig_up, freq_fig_down, nrow=2, align="v")
compare_figs

Using Kristen’s code exactly

Merge the frequency of GOterms for both up- and down-regulation of calcification

The following list is a list of BP-ontology (“Biological Process”) GO terms that were significantly enriched (P_adj < 0.00001) within the modules that were upregulated for calcification. The list has been further reduced by using the package rrvgo.

cal_up_terms <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_filtered.csv")

cal_up_terms <- cal_up_terms %>%
  mutate(Factor = "Up")

cal_up_terms_parent_nterm <- cal_up_terms %>%
  dplyr::group_by(ParentTerm) %>%
  dplyr::summarize(Number.of.terms = n_distinct(term)) %>%
  mutate(Calcification.direction = "Up")  %>% ungroup()

head(cal_up_terms)
##   X.1 X     GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1   1 1 GO:0006807                       0                        1       1215
## 2   2 2 GO:0007275                       0                        1        978
## 3   3 4 GO:0008152                       0                        1       1387
## 4   4 5 GO:0009987                       0                        1       1938
## 5   5 6 GO:0016043                       0                        1       1049
## 6   6 7 GO:0019222                       0                        1        982
##   numInCat                                term ontology bh_adjust
## 1     1215 nitrogen compound metabolic process       BP         0
## 2      978  multicellular organism development       BP         0
## 3     1387                   metabolic process       BP         0
## 4     1938                    cellular process       BP         0
## 5     1049     cellular component organization       BP         0
## 6      982     regulation of metabolic process       BP         0
##                                      ParentTerm Factor
## 1                             metabolic process     Up
## 2                         developmental process     Up
## 3                             metabolic process     Up
## 4                              cellular process     Up
## 5 cellular component organization or biogenesis     Up
## 6               regulation of metabolic process     Up

The following list is a list of BP-ontology (“Biological Process”) GO terms that were significantly enriched (P_adj < 0.00001) within the modules that were downregulated for calcification. The list has been further reduced by using the package rrvgo.

cal_down_terms <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down_filtered.csv")

cal_down_terms <- cal_down_terms %>% mutate(Factor = "Down")

cal_down_terms_parent_nterm <- cal_down_terms %>%
  dplyr::group_by(ParentTerm) %>%
  dplyr::summarize(Number.of.terms = n_distinct(term)) %>%
  mutate(Calcification.direction = "Down")  %>% ungroup()

head(cal_down_terms)
##   X.1 X     GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1   1 1 GO:0006807                       0                        1        910
## 2   2 2 GO:0007275                       0                        1        610
## 3   3 4 GO:0008152                       0                        1        998
## 4   4 5 GO:0009987                       0                        1       1277
## 5   5 6 GO:0016043                       0                        1        652
## 6   6 7 GO:0019222                       0                        1        599
##   numInCat                                term ontology bh_adjust
## 1      910 nitrogen compound metabolic process       BP         0
## 2      610  multicellular organism development       BP         0
## 3      998                   metabolic process       BP         0
## 4     1277                    cellular process       BP         0
## 5      652     cellular component organization       BP         0
## 6      599     regulation of metabolic process       BP         0
##                                      ParentTerm Factor
## 1                             metabolic process   Down
## 2                         developmental process   Down
## 3                             metabolic process   Down
## 4                              cellular process   Down
## 5 cellular component organization or biogenesis   Down
## 6               regulation of metabolic process   Down
cal_freq_terms<- merge(cal_up_terms_parent_nterm,cal_down_terms_parent_nterm, by=c("ParentTerm","Number.of.terms","Calcification.direction"),all=T)
cal_freq_terms
##                                                                    ParentTerm
## 1                                                actin filament-based process
## 2                                                actin filament-based process
## 3                                                                       aging
## 4                                                                       aging
## 5                                                     amide metabolic process
## 6                                                     amide metabolic process
## 7                                              ammonium ion metabolic process
## 8                                              ammonium ion metabolic process
## 9                                          anatomical structure morphogenesis
## 10                                         anatomical structure morphogenesis
## 11                                                   animal organ development
## 12                                                   animal organ development
## 13                                                                   behavior
## 14                                                                   behavior
## 15  biological process involved in interspecies interaction between organisms
## 16                       biological process involved in symbiotic interaction
## 17                                                       biosynthetic process
## 18                                                       biosynthetic process
## 19                                  carbohydrate derivative metabolic process
## 20                                  carbohydrate derivative metabolic process
## 21                                             carbohydrate metabolic process
## 22                                             carbohydrate metabolic process
## 23                                                          catabolic process
## 24                                                          catabolic process
## 25                                                            cell activation
## 26                                                            cell activation
## 27                                                         cell communication
## 28                                                         cell communication
## 29                                                                 cell cycle
## 30                                                                 cell cycle
## 31                                                              cell division
## 32                                                              cell division
## 33                                                       cell fate commitment
## 34                                                       cell fate commitment
## 35                                                              cell motility
## 36                                                              cell motility
## 37                                               cell projection organization
## 38                                               cell projection organization
## 39                                                           cell recognition
## 40                                                           cell recognition
## 41                                    cell surface receptor signaling pathway
## 42                                                        cell-cell signaling
## 43                                        cellular aldehyde metabolic process
## 44                                              cellular component biogenesis
## 45                                              cellular component biogenesis
## 46                                             cellular component disassembly
## 47                              cellular component organization or biogenesis
## 48                              cellular component organization or biogenesis
## 49                                                      cellular localization
## 50                                                      cellular localization
## 51                                   cellular macromolecule metabolic process
## 52                                   cellular macromolecule metabolic process
## 53                             cellular modified amino acid metabolic process
## 54                             cellular modified amino acid metabolic process
## 55                                                           cellular process
## 56                                                           cellular process
## 57                                cellular response to environmental stimulus
## 58                                cellular response to environmental stimulus
## 59                                                     chromatin organization
## 60                                                     chromatin organization
## 61                                                                coagulation
## 62                                                                coagulation
## 63                                                     cytoplasm organization
## 64                                                     cytoplasm organization
## 65                                                           defense response
## 66                                                      detection of stimulus
## 67                                                   developmental maturation
## 68                                                   developmental maturation
## 69                                                      developmental process
## 70                                                      developmental process
## 71                                                      DNA metabolic process
## 72                                                         embryo development
## 73                                           endomembrane system organization
## 74                              establishment or maintenance of cell polarity
## 75                              establishment or maintenance of cell polarity
## 76                                       extracellular structure organization
## 77                             generation of precursor metabolites and energy
## 78                             generation of precursor metabolites and energy
## 79                                                              glycosylation
## 80                                                              glycosylation
## 81                                                           head development
## 82                                                           head development
## 83                                                                hemopoiesis
## 84                                                                hemopoiesis
## 85                                                        homeostatic process
## 86                                                        homeostatic process
## 87                                                      immune system process
## 88                                                      immune system process
## 89                                                           import into cell
## 90                                                           import into cell
## 91                                          intracellular signal transduction
## 92                                                    lipid metabolic process
## 93                                                    lipid metabolic process
## 94                                                               localization
## 95                                                               localization
## 96                                                                 locomotion
## 97                                                                 locomotion
## 98                                                 macromolecule localization
## 99                                                 macromolecule localization
## 100                                                macromolecule modification
## 101                                                macromolecule modification
## 102                                                maintenance of cell number
## 103                                                maintenance of cell number
## 104                                                   maintenance of location
## 105                                                   maintenance of location
## 106                                                     membrane organization
## 107                                                     membrane organization
## 108                                                         metabolic process
## 109                                                         metabolic process
## 110                                                               methylation
## 111                                                               methylation
## 112                                              microtubule bundle formation
## 113                                                 microtubule-based process
## 114                                                 microtubule-based process
## 115                                               microtubule-based transport
## 116                                               microtubule-based transport
## 117                                             mitochondrial gene expression
## 118                                             mitochondrial gene expression
## 119                                                             molting cycle
## 120                                                             molting cycle
## 121                                                  monoatomic ion transport
## 122                                                  monoatomic ion transport
## 123                                      multi-multicellular organism process
## 124                                      multi-multicellular organism process
## 125                                          multicellular organismal process
## 126                                          multicellular organismal process
## 127                                              muscle structure development
## 128                                              muscle structure development
## 129                                 negative regulation of biological process
## 130                                 negative regulation of biological process
## 131                                                neurotransmitter transport
## 132                                                neurotransmitter transport
## 133                                               nitrogen compound transport
## 134                                               nitrogen compound transport
## 135                                                    organelle localization
## 136                                                    organelle localization
## 137                              organelle localization by membrane tethering
## 138                              organelle localization by membrane tethering
## 139                                 organic cyclic compound metabolic process
## 140                                 organic cyclic compound metabolic process
## 141                                organic hydroxy compound metabolic process
## 142                                organic hydroxy compound metabolic process
## 143                                               organic substance transport
## 144                                               organic substance transport
## 145                                         organophosphate metabolic process
## 146                                             pattern specification process
## 147                                             pattern specification process
## 148                                          peptidyl-amino acid modification
## 149                                          peptidyl-amino acid modification
## 150                                              phosphorus metabolic process
## 151                                              phosphorus metabolic process
## 152                                                 pigment metabolic process
## 153                                                 pigment metabolic process
## 154                                                              pigmentation
## 155                                                              pigmentation
## 156                                 positive regulation of biological process
## 157                                 positive regulation of biological process
## 158                                                post-embryonic development
## 159                                                post-embryonic development
## 160                                                           protein folding
## 161                                                           protein folding
## 162                                                        protein maturation
## 163                                   protein-containing complex localization
## 164                                   protein-containing complex localization
## 165                                   protein-containing complex organization
## 166                                   protein-containing complex organization
## 167                                                               proteolysis
## 168                                                               proteolysis
## 169                            pyridine-containing compound metabolic process
## 170                            pyridine-containing compound metabolic process
## 171                                                receptor metabolic process
## 172                                                receptor metabolic process
## 173                                                   regulation of autophagy
## 174                                                   regulation of autophagy
## 175                                          regulation of biological quality
## 176                                          regulation of biological quality
## 177                                               regulation of cell adhesion
## 178                                               regulation of cell adhesion
## 179                                          regulation of cell communication
## 180                                          regulation of cell communication
## 181                                                  regulation of cell death
## 182                                                  regulation of cell death
## 183                               regulation of cell population proliferation
## 184                               regulation of cell population proliferation
## 185                             regulation of cellular component organization
## 186                             regulation of cellular component organization
## 187                                            regulation of cellular process
## 188                                            regulation of cellular process
## 189                                         regulation of cytokine production
## 190                                       regulation of developmental process
## 191                                       regulation of developmental process
## 192                                                      regulation of growth
## 193                                                      regulation of growth
## 194                                                regulation of localization
## 195                                                regulation of localization
## 196                                           regulation of metabolic process
## 197                                           regulation of metabolic process
## 198                                          regulation of molecular function
## 199                                          regulation of molecular function
## 200                            regulation of multicellular organismal process
## 201                            regulation of multicellular organismal process
## 202                          regulation of plasma lipoprotein particle levels
## 203                   regulation of reactive oxygen species metabolic process
## 204                   regulation of reactive oxygen species metabolic process
## 205                                        regulation of response to stimulus
## 206                                        regulation of response to stimulus
## 207                                                              reproduction
## 208                                                              reproduction
## 209                                              response to abiotic stimulus
## 210                                              response to abiotic stimulus
## 211                                               response to biotic stimulus
## 212                                               response to biotic stimulus
## 213                                                      response to chemical
## 214                                                      response to chemical
## 215                                           response to endogenous stimulus
## 216                                           response to endogenous stimulus
## 217                                             response to external stimulus
## 218                                             response to external stimulus
## 219                                             response to organic substance
## 220                                                      response to stimulus
## 221                                                      response to stimulus
## 222                                                        response to stress
## 223                                                        response to stress
## 224                                           response to xenobiotic stimulus
## 225                                           response to xenobiotic stimulus
## 226                                                          rhythmic process
## 227                                                          rhythmic process
## 228                                                            RNA processing
## 229                                                            RNA processing
## 230                                               secondary metabolic process
## 231                                               secondary metabolic process
## 232                                                                 secretion
## 233                                                                 secretion
## 234                                                                 signaling
## 235                                                                 signaling
## 236                                                                     sleep
## 237                                          small molecule metabolic process
## 238                                          small molecule metabolic process
## 239                                         sulfur compound metabolic process
## 240                                         sulfur compound metabolic process
## 241                                                      synapse organization
## 242                                                      synapse organization
## 243                                                        system development
## 244                                                        system development
## 245                                                            system process
## 246                                                            system process
## 247                                                        tissue development
## 248                                                        tissue development
## 249                                                          tissue migration
## 250                                                          tissue migration
## 251                                                          tube development
## 252                                                          tube development
## 253                                                      vesicle organization
## 254                                                vesicle-mediated transport
## 255                                                vesicle-mediated transport
## 256                                                             viral process
## 257                                                             viral process
##     Number.of.terms Calcification.direction
## 1                10                    Down
## 2                12                      Up
## 3                 2                    Down
## 4                 2                      Up
## 5                12                    Down
## 6                12                      Up
## 7                 1                    Down
## 8                 1                      Up
## 9                23                    Down
## 10               28                      Up
## 11               16                    Down
## 12               16                      Up
## 13                8                    Down
## 14                8                      Up
## 15                6                    Down
## 16                5                      Up
## 17               20                      Up
## 18               24                    Down
## 19               17                    Down
## 20               22                      Up
## 21               10                      Up
## 22               12                    Down
## 23               36                      Up
## 24               39                    Down
## 25               13                    Down
## 26               13                      Up
## 27                2                    Down
## 28               16                      Up
## 29               41                    Down
## 30               50                      Up
## 31                7                    Down
## 32               10                      Up
## 33                8                    Down
## 34                8                      Up
## 35                8                    Down
## 36                9                      Up
## 37               20                    Down
## 38               27                      Up
## 39                2                    Down
## 40                2                      Up
## 41               22                      Up
## 42               19                    Down
## 43                1                    Down
## 44               18                      Up
## 45               24                    Down
## 46                5                      Up
## 47               26                    Down
## 48               31                      Up
## 49               24                    Down
## 50               31                      Up
## 51                8                      Up
## 52               21                    Down
## 53                1                      Up
## 54                2                    Down
## 55                1                    Down
## 56                1                      Up
## 57                5                    Down
## 58                6                      Up
## 59                6                    Down
## 60                6                      Up
## 61                2                    Down
## 62                3                      Up
## 63                1                    Down
## 64                1                      Up
## 65               12                      Up
## 66                4                      Up
## 67                6                    Down
## 68                9                      Up
## 69               10                      Up
## 70               12                    Down
## 71               16                      Up
## 72                8                      Up
## 73                4                    Down
## 74                5                    Down
## 75                6                      Up
## 76                3                      Up
## 77                8                      Up
## 78               11                    Down
## 79                4                    Down
## 80                4                      Up
## 81                2                    Down
## 82                2                      Up
## 83                4                    Down
## 84                6                      Up
## 85               22                    Down
## 86               24                      Up
## 87               23                    Down
## 88               25                      Up
## 89                1                    Down
## 90                1                      Up
## 91               32                      Up
## 92               16                    Down
## 93               28                      Up
## 94               10                      Up
## 95               11                    Down
## 96                7                    Down
## 97               12                      Up
## 98               26                      Up
## 99               28                    Down
## 100              14                      Up
## 101              17                    Down
## 102               3                    Down
## 103               3                      Up
## 104               4                      Up
## 105               5                    Down
## 106               6                    Down
## 107              10                      Up
## 108              15                    Down
## 109              15                      Up
## 110               2                    Down
## 111               2                      Up
## 112               4                    Down
## 113              11                    Down
## 114              17                      Up
## 115               3                    Down
## 116               3                      Up
## 117               2                    Down
## 118               2                      Up
## 119               2                    Down
## 120               2                      Up
## 121               8                    Down
## 122              10                      Up
## 123               1                    Down
## 124               1                      Up
## 125               2                    Down
## 126               2                      Up
## 127              11                      Up
## 128              14                    Down
## 129              17                      Up
## 130              18                    Down
## 131               2                      Up
## 132               3                    Down
## 133              10                      Up
## 134              12                    Down
## 135               9                    Down
## 136              16                      Up
## 137               2                    Down
## 138               2                      Up
## 139               5                    Down
## 140               5                      Up
## 141               4                    Down
## 142               4                      Up
## 143               5                    Down
## 144               8                      Up
## 145              13                    Down
## 146              17                    Down
## 147              17                      Up
## 148              20                    Down
## 149              22                      Up
## 150              16                    Down
## 151              27                      Up
## 152               1                    Down
## 153               2                      Up
## 154               2                    Down
## 155               2                      Up
## 156              16                    Down
## 157              16                      Up
## 158               5                      Up
## 159               6                    Down
## 160               2                    Down
## 161               3                      Up
## 162               3                    Down
## 163               1                    Down
## 164               1                      Up
## 165              11                    Down
## 166              12                      Up
## 167               5                    Down
## 168               7                      Up
## 169               3                      Up
## 170               5                    Down
## 171               1                    Down
## 172               1                      Up
## 173               6                      Up
## 174               7                    Down
## 175              22                    Down
## 176              26                      Up
## 177               9                    Down
## 178              11                      Up
## 179              10                      Up
## 180              34                    Down
## 181              25                    Down
## 182              25                      Up
## 183              10                    Down
## 184              12                      Up
## 185              20                    Down
## 186              22                      Up
## 187               3                    Down
## 188               3                      Up
## 189               4                      Up
## 190              11                      Up
## 191              27                    Down
## 192              22                    Down
## 193              24                      Up
## 194              40                    Down
## 195              40                      Up
## 196              49                    Down
## 197              50                      Up
## 198              45                      Up
## 199              49                    Down
## 200               9                    Down
## 201              12                      Up
## 202               1                    Down
## 203               1                    Down
## 204               3                      Up
## 205              12                      Up
## 206              14                    Down
## 207              37                    Down
## 208              37                      Up
## 209              14                    Down
## 210              16                      Up
## 211               9                      Up
## 212              10                    Down
## 213              15                      Up
## 214              65                    Down
## 215              13                    Down
## 216              16                      Up
## 217               9                      Up
## 218              13                    Down
## 219              56                      Up
## 220               2                      Up
## 221               6                    Down
## 222              15                      Up
## 223              20                    Down
## 224               3                      Up
## 225               4                    Down
## 226               3                    Down
## 227               3                      Up
## 228              28                    Down
## 229              28                      Up
## 230               1                    Down
## 231               1                      Up
## 232              19                    Down
## 233              19                      Up
## 234              14                      Up
## 235              30                    Down
## 236               1                    Down
## 237              43                      Up
## 238              56                    Down
## 239               2                    Down
## 240               2                      Up
## 241               9                    Down
## 242              15                      Up
## 243              27                    Down
## 244              31                      Up
## 245              18                    Down
## 246              18                      Up
## 247              15                    Down
## 248              19                      Up
## 249               5                    Down
## 250               5                      Up
## 251               6                    Down
## 252               6                      Up
## 253               4                      Up
## 254              14                      Up
## 255              15                    Down
## 256               1                    Down
## 257               2                      Up

The following list is a list of BP-ontology (“Biological Process”) GO terms that were significantly enriched (P_adj < 0.00001) within the list of biomineralization-related genes. The list has been further reduced by using the package rrvgo.

cal_biomin_terms <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_biomin_filtered.csv")

cal_biomin_terms <- cal_biomin_terms %>% mutate(Factor = "Biomin")
head(cal_biomin_terms)
##   X.1 X     GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1   1 1 GO:0006807                       0                        1         11
## 2   2 2 GO:0006810                       0                        1          7
## 3   3 3 GO:0006811                       0                        1          7
## 4   4 4 GO:0006873                       0                        1          6
## 5   5 5 GO:0006996                       0                        1          6
## 6   6 6 GO:0007154                       0                        1          9
##   numInCat                                     term ontology bh_adjust
## 1       11      nitrogen compound metabolic process       BP         0
## 2        7                                transport       BP         0
## 3        7                 monoatomic ion transport       BP         0
## 4        6 intracellular monoatomic ion homeostasis       BP         0
## 5        6                   organelle organization       BP         0
## 6        9                       cell communication       BP         0
##                                      ParentTerm Factor
## 1                     primary metabolic process Biomin
## 2                                  localization Biomin
## 3                      monoatomic ion transport Biomin
## 4                           homeostatic process Biomin
## 5 cellular component organization or biogenesis Biomin
## 6                            cell communication Biomin
#cal_biomin_terms <-read.csv("/Users/imkri/Desktop/Penn Postdoc/Heron experiment/Biomineralizationgo terms.csv")
#cal_biomin_terms
# cal_up_terms <-read.csv("/Users/imkri/Desktop/Penn Postdoc/Heron experiment/goseq_pattern_calcification_filtered.csv")
# cal_up_terms<- cal_up_terms %>%
#   mutate(Factor = "Up")
# cal_up_terms
# cal_down_terms <-read.csv("/Users/imkri/Desktop/Penn Postdoc/Heron experiment/goseq_pattern_calcification_down_filtered.csv")
# cal_down_terms<-cal_down_terms %>%
#   mutate(Factor = "Down")
# cal_down_terms
colnames(cal_biomin_terms)
##  [1] "X.1"                      "X"                       
##  [3] "GOterm"                   "over_represented_pvalue" 
##  [5] "under_represented_pvalue" "numDEInCat"              
##  [7] "numInCat"                 "term"                    
##  [9] "ontology"                 "bh_adjust"               
## [11] "ParentTerm"               "Factor"

Merge biomineralization, up and down-regulation of calcification GOterms

all_terms<- merge(cal_up_terms,cal_down_terms, by=c("Factor","GOterm","X.1","X","GOterm","over_represented_pvalue","under_represented_pvalue","numDEInCat","numInCat","term","ontology","bh_adjust","ParentTerm"),all=T)
all_terms<- merge(all_terms,cal_biomin_terms, by=c("Factor","GOterm","X.1","X","GOterm","over_represented_pvalue","under_represented_pvalue","numDEInCat","numInCat","term","ontology","bh_adjust","ParentTerm"),all=T)
all_terms$GOterm<-as.factor(all_terms$GOterm)
head(all_terms)
##   Factor     GOterm X.1    X over_represented_pvalue under_represented_pvalue
## 1 Biomin GO:0000003  87   88            1.201233e-10                        1
## 2 Biomin GO:0000041 455  624            1.015731e-02                        1
## 3 Biomin GO:0000122 388  462            6.463204e-03                        1
## 4 Biomin GO:0000132 726 1029            1.746098e-02                        1
## 5 Biomin GO:0000226 178  184            1.981187e-06                        1
## 6 Biomin GO:0000278 248  275            1.014913e-04                        1
##   numDEInCat numInCat                                                      term
## 1          5        5                                              reproduction
## 2          1        1                            transition metal ion transport
## 3          1        1 negative regulation of transcription by RNA polymerase II
## 4          1        1              establishment of mitotic spindle orientation
## 5          3        3                     microtubule cytoskeleton organization
## 6          2        2                                        mitotic cell cycle
##   ontology    bh_adjust                                    ParentTerm
## 1       BP 1.286789e-09                                  reproduction
## 2       BP 1.269710e-02                      monoatomic ion transport
## 3       BP 1.269710e-02     negative regulation of biological process
## 4       BP 1.746098e-02 establishment or maintenance of cell polarity
## 5       BP 9.966176e-06                     microtubule-based process
## 6       BP 3.523580e-04                            mitotic cell cycle
goterms_shared <- all_terms %>%
  group_by(GOterm) %>%
  dplyr::summarise(
    ParentTerm = paste(unique(ParentTerm), collapse = ", "),
    Factor = paste(unique(Factor), collapse = ", ")
  )
head(goterms_shared)
## # A tibble: 6 × 3
##   GOterm     ParentTerm                                    Factor          
##   <fct>      <chr>                                         <chr>           
## 1 GO:0000003 reproduction                                  Biomin, Down, Up
## 2 GO:0000028 cellular component biogenesis                 Down            
## 3 GO:0000041 monoatomic ion transport                      Biomin, Down, Up
## 4 GO:0000070 cellular component organization or biogenesis Down, Up        
## 5 GO:0000075 cell cycle                                    Down, Up        
## 6 GO:0000077 cell cycle, intracellular signal transduction Down, Up
#write.csv(goterms_shared, "Merged GOterms by factor and ParentTerm.csv")
result_unique <- goterms_shared %>%
  group_by(ParentTerm,Factor) %>%
  dplyr::summarise(SharedGOterms = n_distinct(GOterm))%>%
  arrange(-SharedGOterms)
## `summarise()` has grouped output by 'ParentTerm'. You can override using the
## `.groups` argument.
result_unique
## # A tibble: 445 × 3
## # Groups:   ParentTerm [243]
##    ParentTerm                                          Factor      SharedGOterms
##    <chr>                                               <chr>               <int>
##  1 cell cycle                                          Down, Up               31
##  2 small molecule metabolic process                    Down, Up               29
##  3 RNA processing                                      Down, Up               27
##  4 regulation of localization                          Down, Up               25
##  5 response to chemical, response to organic substance Down, Up               25
##  6 regulation of molecular function                    Down, Up               24
##  7 regulation of metabolic process                     Biomin, Do…            22
##  8 catabolic process                                   Down, Up               20
##  9 regulation of metabolic process                     Down, Up               20
## 10 anatomical structure morphogenesis                  Biomin, Do…            19
## # ℹ 435 more rows
result_filtered_up<- result_unique %>%
  dplyr::filter(Factor=="Biomin, Down, Up" | Factor=="Biomin, Up")
 #dplyr::filter(SharedGOterms>=5) 
result_filtered_up
## # A tibble: 138 × 3
## # Groups:   ParentTerm [125]
##    ParentTerm                                               Factor SharedGOterms
##    <chr>                                                    <chr>          <int>
##  1 regulation of metabolic process                          Biomi…            22
##  2 anatomical structure morphogenesis                       Biomi…            19
##  3 circulatory system development, system development       Biomi…            19
##  4 regulation of molecular function                         Biomi…            17
##  5 negative regulation of biological process                Biomi…            16
##  6 reproduction                                             Biomi…            16
##  7 cellular catabolic process, catabolic process            Biomi…            15
##  8 response to cytokine, response to chemical, response to… Biomi…            15
##  9 tissue development                                       Biomi…            14
## 10 cell projection organization                             Biomi…            13
## # ℹ 128 more rows
result_filtered_down<- result_unique %>%
  dplyr::filter(Factor=="Biomin, Down, Up" | Factor=="Biomin, Down")
 #dplyr::filter(SharedGOterms>=5) 
result_filtered_down
## # A tibble: 132 × 3
## # Groups:   ParentTerm [125]
##    ParentTerm                                               Factor SharedGOterms
##    <chr>                                                    <chr>          <int>
##  1 regulation of metabolic process                          Biomi…            22
##  2 anatomical structure morphogenesis                       Biomi…            19
##  3 circulatory system development, system development       Biomi…            19
##  4 regulation of molecular function                         Biomi…            17
##  5 negative regulation of biological process                Biomi…            16
##  6 reproduction                                             Biomi…            16
##  7 cellular catabolic process, catabolic process            Biomi…            15
##  8 response to cytokine, response to chemical, response to… Biomi…            15
##  9 tissue development                                       Biomi…            14
## 10 cell projection organization                             Biomi…            13
## # ℹ 122 more rows

Calculate the percentage of shared GOterms upregulation of calcification

merged_up <- result_filtered_up %>%
  tidyr::separate_rows(ParentTerm, sep = ", ") %>%
  dplyr::group_by(ParentTerm) %>%
  dplyr::summarize(Sum_of_SharedGOterms = sum(SharedGOterms, na.rm = TRUE))%>%
  left_join(cal_up_terms_parent_nterm, by = "ParentTerm") %>%
  mutate(Proportion.of.shared.GO.terms.with.biomineralization.genes = Sum_of_SharedGOterms / Number.of.terms)%>%
  mutate(Percentage.of.shared.GO.terms.with.biomineralization.genes = Proportion.of.shared.GO.terms.with.biomineralization.genes * 100)
merged_up_clean<- na.omit(merged_up)
head(merged_up_clean)
## # A tibble: 6 × 6
##   ParentTerm         Sum_of_SharedGOterms Number.of.terms Calcification.direct…¹
##   <chr>                             <int>           <int> <chr>                 
## 1 actin filament-ba…                    5              12 Up                    
## 2 aging                                 1               2 Up                    
## 3 amide metabolic p…                    2              12 Up                    
## 4 ammonium ion meta…                    1               1 Up                    
## 5 anatomical struct…                   23              28 Up                    
## 6 animal organ deve…                   12              16 Up                    
## # ℹ abbreviated name: ¹​Calcification.direction
## # ℹ 2 more variables:
## #   Proportion.of.shared.GO.terms.with.biomineralization.genes <dbl>,
## #   Percentage.of.shared.GO.terms.with.biomineralization.genes <dbl>

Calculate the percentage of shared GOterms downregulation of calcification

merged_down <- result_filtered_down %>%
  tidyr::separate_rows(ParentTerm, sep = ", ") %>%
  dplyr::group_by(ParentTerm) %>%
  dplyr::summarize(Sum_of_SharedGOterms = sum(SharedGOterms, na.rm = TRUE))%>%
  left_join(cal_down_terms_parent_nterm, by = "ParentTerm") %>%
  mutate(Proportion.of.shared.GO.terms.with.biomineralization.genes = Sum_of_SharedGOterms / Number.of.terms)%>%
  mutate(Percentage.of.shared.GO.terms.with.biomineralization.genes = Proportion.of.shared.GO.terms.with.biomineralization.genes * 100)
merged_down_clean<- na.omit(merged_down)
head(merged_down_clean)
## # A tibble: 6 × 6
##   ParentTerm         Sum_of_SharedGOterms Number.of.terms Calcification.direct…¹
##   <chr>                             <int>           <int> <chr>                 
## 1 actin filament-ba…                    4              10 Down                  
## 2 aging                                 1               2 Down                  
## 3 amide metabolic p…                    2              12 Down                  
## 4 ammonium ion meta…                    1               1 Down                  
## 5 anatomical struct…                   23              23 Down                  
## 6 animal organ deve…                   12              16 Down                  
## # ℹ abbreviated name: ¹​Calcification.direction
## # ℹ 2 more variables:
## #   Proportion.of.shared.GO.terms.with.biomineralization.genes <dbl>,
## #   Percentage.of.shared.GO.terms.with.biomineralization.genes <dbl>

Frequency >10 up

cal_freq_terms_filtered_up <- merged_up_clean %>%
  filter(Number.of.terms>=10) %>%
  filter(Calcification.direction=="Up")
head(cal_freq_terms_filtered_up)
## # A tibble: 6 × 6
##   ParentTerm         Sum_of_SharedGOterms Number.of.terms Calcification.direct…¹
##   <chr>                             <int>           <int> <chr>                 
## 1 actin filament-ba…                    5              12 Up                    
## 2 amide metabolic p…                    2              12 Up                    
## 3 anatomical struct…                   23              28 Up                    
## 4 animal organ deve…                   12              16 Up                    
## 5 biosynthetic proc…                    1              20 Up                    
## 6 carbohydrate deri…                    4              22 Up                    
## # ℹ abbreviated name: ¹​Calcification.direction
## # ℹ 2 more variables:
## #   Proportion.of.shared.GO.terms.with.biomineralization.genes <dbl>,
## #   Percentage.of.shared.GO.terms.with.biomineralization.genes <dbl>

Figure

#counts$Direction.of.flat.origin<- factor(counts$Direction.of.flat.origin, levels =c("up","no pattern","down"))
#counts$Module<- factor(counts$Module, levels=c("Blue","Brown","Greenyellow","Cyan","Pink","Magenta","Lightcyan","Midnight blue","Purple","Turquiose","Red","Black"))
freq_fig_up<-ggplot(cal_freq_terms_filtered_up, aes(y=Number.of.terms,x=reorder(ParentTerm, Number.of.terms), fill=Percentage.of.shared.GO.terms.with.biomineralization.genes,group=1))+
  #facet_wrap(~Calcification.direction, nrow = 1)+
  geom_point(size=5, alpha=1, pch=21,color="black")+
  geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Number.of.terms)) +
  geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
  coord_flip()+
  scale_y_continuous(expression(GO~term~counts),limits=c(0,70))+
  #scale_color_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
  #scale_fill_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
  scale_fill_gradientn(colours=c("white","#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(0, 100))+ 
 #scale_color_gradientn(colours=c("#b2182b","#fddbc7","white","#d1e5f0","#67a9cf", "#67a9cf", "#2166ac"), na.value = "grey98",limits = c(-0, 40))+ 
  theme_classic()+
   theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        legend.title = element_blank(),
        axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger 
        axis.title.x = element_blank(),#making the axis title larger 
        axis.title.y = element_blank(),
        strip.text = element_text(size=12))#making the axis title larger 
freq_fig_up

ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_up_KB.pdf", plot=freq_fig_up, dpi=300, height=20, width=10, units="in", limitsize=FALSE)

Frequency >10 down

cal_freq_terms_filtered_down <- merged_down_clean %>%
  filter(Number.of.terms>=10) %>% 
  filter(Calcification.direction=="Down")
cal_freq_terms_filtered_down
## # A tibble: 55 × 6
##    ParentTerm        Sum_of_SharedGOterms Number.of.terms Calcification.direct…¹
##    <chr>                            <int>           <int> <chr>                 
##  1 actin filament-b…                    4              10 Down                  
##  2 amide metabolic …                    2              12 Down                  
##  3 anatomical struc…                   23              23 Down                  
##  4 animal organ dev…                   12              16 Down                  
##  5 biosynthetic pro…                    1              24 Down                  
##  6 carbohydrate der…                    4              17 Down                  
##  7 catabolic process                   15              39 Down                  
##  8 cell cycle                           9              41 Down                  
##  9 cell projection …                   13              20 Down                  
## 10 cell-cell signal…                    8              19 Down                  
## # ℹ 45 more rows
## # ℹ abbreviated name: ¹​Calcification.direction
## # ℹ 2 more variables:
## #   Proportion.of.shared.GO.terms.with.biomineralization.genes <dbl>,
## #   Percentage.of.shared.GO.terms.with.biomineralization.genes <dbl>

Figure

freq_fig_down<-ggplot(cal_freq_terms_filtered_down, aes(y=Number.of.terms,x=reorder(ParentTerm, Number.of.terms), fill=Percentage.of.shared.GO.terms.with.biomineralization.genes))+
  #facet_wrap(~Calcification.direction, nrow = 1)+
  geom_point(size=5, alpha=1, pch=21,color="black")+
  geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Number.of.terms)) +
  #geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
  coord_flip()+
  scale_y_continuous(expression(GO~term~counts),limits=c(0,70))+
  #scale_color_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
  #scale_fill_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
  scale_fill_gradientn(colours=c("white","#d1e5f0","#92c5de","#4393c3","#2166ac"), na.value = "grey98",limits = c(0, 100))+ 
  #scale_fill_gradientn(colours=c("#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(10, 40))+ 
 #scale_color_gradientn(colours=c("#b2182b","#fddbc7","white","#d1e5f0","#67a9cf", "#67a9cf", "#2166ac"), na.value = "grey98",limits = c(-0, 40))+ 
  theme_classic()+
   theme(axis.text.x=element_text(vjust=0.5, hjust=0.95, size=12),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        legend.title = element_blank(),
        axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger 
        axis.title.x = element_blank(),#making the axis title larger 
        axis.title.y = element_blank(),
        strip.text = element_text(size=12))#making the axis title larger 
freq_fig_down

ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_down_KB.pdf", plot=freq_fig_down, dpi=300, height=20, width=10, units="in", limitsize=FALSE)
compare_figs<-cowplot::plot_grid(freq_fig_up, freq_fig_down, nrow=2, align="v")
compare_figs